// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>

#include <turbo/base/endian.h>
#include <aries/parquet/platform.h>
#include <aries/parquet/schema.h>

namespace parquet::internal {
    struct PARQUET_EXPORT LevelInfo {
        LevelInfo()
            : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {
        }

        LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repetition_level,
                  int32_t repeated_ancestor_definition_level)
            : null_slot_usage(null_slots),
              def_level(static_cast<int16_t>(definition_level)),
              rep_level(static_cast<int16_t>(repetition_level)),
              repeated_ancestor_def_level(
                  static_cast<int16_t>(repeated_ancestor_definition_level)) {
        }

        bool operator==(const LevelInfo &b) const {
            return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
                   rep_level == b.rep_level &&
                   repeated_ancestor_def_level == b.repeated_ancestor_def_level;
        }

        bool HasNullableValues() const { return repeated_ancestor_def_level < def_level; }

        // How many slots an undefined but present (i.e. null) element in
        // parquet consumes when decoding to Arrow.
        // "Slot" is used in the same context as the Arrow specification
        // (i.e. a value holder).
        // This is only ever >1 for descendents of FixedSizeList.
        int32_t null_slot_usage = 1;

        // The definition level at which the value for the field
        // is considered not null (definition levels greater than
        // or equal to this value indicate a not-null
        // value for the field). For list fields definition levels
        // greater than or equal to this field indicate a present,
        // possibly null, child value.
        int16_t def_level = 0;

        // The repetition level corresponding to this element
        // or the closest repeated ancestor.  Any repetition
        // level less than this indicates either a new list OR
        // an empty list (which is determined in conjunction
        // with definition levels).
        int16_t rep_level = 0;

        // The definition level indicating the level at which the closest
        // repeated ancestor is not empty.  This is used to discriminate
        // between a value less than |def_level| being null or excluded entirely.
        // For instance if we have an arrow schema like:
        // list(struct(f0: int)).  Then then there are the following
        // definition levels:
        //   0 = null list
        //   1 = present but empty list.
        //   2 = a null value in the list
        //   3 = a non null struct but null integer.
        //   4 = a present integer.
        // When reconstructing, the struct and integer arrays'
        // repeated_ancestor_def_level would be 2.  Any
        // def_level < 2 indicates that there isn't a corresponding
        // child value in the list.
        // i.e. [null, [], [null], [{f0: null}], [{f0: 1}]]
        // has the def levels [0, 1, 2, 3, 4].  The actual
        // struct array is only of length 3: [not-set, set, set] and
        // the int array is also of length 3: [N/A, null, 1].
        //
        int16_t repeated_ancestor_def_level = 0;

        /// Increments levels according to the cardinality of node.
        void Increment(const schema::Node &node) {
            if (node.is_repeated()) {
                IncrementRepeated();
                return;
            }
            if (node.is_optional()) {
                IncrementOptional();
                return;
            }
        }

        /// Increments level for a optional node.
        void IncrementOptional() { def_level++; }

        /// Increments levels for the repeated node.  Returns
        /// the previous ancestor_list_def_level.
        int16_t IncrementRepeated() {
            int16_t last_repeated_ancestor = repeated_ancestor_def_level;

            // Repeated fields add both a repetition and definition level. This is used
            // to distinguish between an empty list and a list with an item in it.
            ++rep_level;
            ++def_level;
            // For levels >= repeated_ancestor_def_level it indicates the list was
            // non-null and had at least one element.  This is important
            // for later decoding because we need to add a slot for these
            // values.  for levels < current_def_level no slots are added
            // to arrays.
            repeated_ancestor_def_level = def_level;
            return last_repeated_ancestor;
        }

        // Calculates and returns LevelInfo for a column descriptor.
        static LevelInfo ComputeLevelInfo(const ColumnDescriptor *descr) {
            LevelInfo level_info;
            level_info.def_level = descr->max_definition_level();
            level_info.rep_level = descr->max_repetition_level();

            int16_t min_spaced_def_level = descr->max_definition_level();
            const ::parquet::schema::Node *node = descr->schema_node().get();
            while (node && !node->is_repeated()) {
                if (node->is_optional()) {
                    min_spaced_def_level--;
                }
                node = node->parent();
            }
            level_info.repeated_ancestor_def_level = min_spaced_def_level;
            return level_info;
        }

        friend std::ostream &operator<<(std::ostream &os, const LevelInfo &levels) {
            // This print method is to silence valgrind issues.  What's printed
            // is not important because all asserts happen directly on
            // members.
            os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
                    << ", repeated_ancestor_def=" << levels.repeated_ancestor_def_level;
            if (levels.null_slot_usage > 1) {
                os << ", null_slot_usage=" << levels.null_slot_usage;
            }
            os << "}";
            return os;
        }
    };

    // Input/Output structure for reconstructed validity bitmaps.
    struct PARQUET_EXPORT ValidityBitmapInputOutput {
        // Input only.
        // The maximum number of values_read expected (actual
        // values read must be less than or equal to this value).
        // If this number is exceeded methods will throw a
        // ParquetException. Exceeding this limit indicates
        // either a corrupt or incorrectly written file.
        int64_t values_read_upper_bound = 0;
        // Output only. The number of values added to the encountered
        // (this is logically the count of the number of elements
        // for an Arrow array).
        int64_t values_read = 0;
        // Input/Output. The number of nulls encountered.
        int64_t null_count = 0;
        // Output only. The validity bitmap to populate. Maybe be null only
        // for DefRepLevelsToListInfo (if all that is needed is list offsets).
        uint8_t *valid_bits = nullptr;
        // Input only, offset into valid_bits to start at.
        int64_t valid_bits_offset = 0;
    };

    //  Converts def_levels to validity bitmaps for non-list arrays and structs that have
    //  at least one member that is not a list and has no list descendents.
    //  For lists use DefRepLevelsToList and structs where all descendants contain
    //  a list use DefRepLevelsToBitmap.
    void PARQUET_EXPORT DefLevelsToBitmap(const int16_t *def_levels, int64_t num_def_levels,
                                          LevelInfo level_info,
                                          ValidityBitmapInputOutput *output);

    // Reconstructs a validity bitmap and list offsets for a list arrays based on
    // def/rep levels. The first element of offsets will not be modified if rep_levels
    // starts with a new list.  The first element of offsets will be used when calculating
    // the next offset.  See documentation onf DefLevelsToBitmap for when to use this
    // method vs the other ones in this file for reconstruction.
    //
    // Offsets must be sized to 1 + values_read_upper_bound.
    void PARQUET_EXPORT DefRepLevelsToList(const int16_t *def_levels,
                                           const int16_t *rep_levels, int64_t num_def_levels,
                                           LevelInfo level_info,
                                           ValidityBitmapInputOutput *output,
                                           int32_t *offsets);

    void PARQUET_EXPORT DefRepLevelsToList(const int16_t *def_levels,
                                           const int16_t *rep_levels, int64_t num_def_levels,
                                           LevelInfo level_info,
                                           ValidityBitmapInputOutput *output,
                                           int64_t *offsets);

    // Reconstructs a validity bitmap for a struct every member is a list or has
    // a list descendant.  See documentation on DefLevelsToBitmap for when more
    // details on this method compared to the other ones defined above.
    void PARQUET_EXPORT DefRepLevelsToBitmap(const int16_t *def_levels,
                                             const int16_t *rep_levels,
                                             int64_t num_def_levels, LevelInfo level_info,
                                             ValidityBitmapInputOutput *output);

    // This is exposed to ensure we can properly test a software simulated pext function
    // (i.e. it isn't hidden by runtime dispatch).
    uint64_t PARQUET_EXPORT TestOnlyExtractBitsSoftware(uint64_t bitmap, uint64_t selection);
} // namespace parquet::internal
